#### setting environment ####
require(quanteda)
require(stm)
require(stringi)

newspaper.names <- c("Asahi", "Chugoku", "Chunichi", "Hokkaido", "Kahoku", 
                     "Mainichi", "Nikkei", "Nishinippon", "Sankei", "Yomiuri")

#### data preparation ####
# document-feature matrix of newspaper editorials published from October 1, 2017 to September 30, 2018
load("Study2_dfm.Rdata")
# estimation results of the 65-topic model
load("STM_result_65.Rdata")

#### estimate topic-specific positions by the Wordfish ####
## estimate the Wordfish model for each topic
topic.specific.positions <- matrix(NA, 10, 65)
rownames(topic.specific.positions) <- newspaper.names
Study2.wordfish.result <- list()
for (i in 1:65) {
  # document-feature matrix for topic i (taking a weighted sum of the frequency of term)
  weighted.dfm <- Study2.dfm * tcrossprod(STM.result$theta[, i], rep(1, ncol(Study2.dfm)))
  # compress the document-feature matrix at the newspaper level
  dfm.matrix <- matrix(NA, 10, ncol(Study2.dfm))
  rownames(dfm.matrix) <- newspaper.names
  colnames(dfm.matrix) <- colnames(Study2.dfm)
  for (j in 1:10) {
    dfm.matrix[j, ] <- round(colSums(weighted.dfm[Study2.dfm@docvars$newspaper == newspaper.names[j], ]))
  }
  # remove words not used by two or more newspapers
  dfm.matrix <- dfm.matrix[, -1 * which(colSums(dfm.matrix > 0) < 2)]
  # convert a matrix to a dfm object
  pseudo.text <- rep("", 10)
  for (j in 1:10) {
    for (k in 1:ncol(dfm.matrix)) {
      pseudo.text[j] <- paste(pseudo.text[j], 
                              paste(rep(colnames(dfm.matrix)[k], dfm.matrix[j, k]), collapse = " "))
    }
  }
  Wordfish.dfm <- dfm(pseudo.text, what = "fastestword")
  # estimate the Wordfish model
  set.seed(12345)
  Study2.wordfish.result[[i]] <- textmodel_wordfish(Wordfish.dfm, sparse = TRUE)
  # record topic-specific positions of the newspapers
  topic.specific.positions[, i] <- Study2.wordfish.result[[i]]$theta
  print(paste0("Wordfish on Topic ", i, " is finished at ", date(), "."))
}

# save(Study2.wordfish.result, topic.specific.positions, file = "Study2_Wordfish_result.Rdata")